##Redistricting Simulation Script NC#############
####################################################################
####pkgs
library(redist) # we used version 2.0.2, which was the latest version when we did the analysis
library(foreign)
library(tidyr)
library(dplyr)
library(sp) # basic spatial object handling
library(raster) # raster (pixel) object handling
library(rgdal) # shapefile access
library(rgeos) # geometry operations
library(reshape2)
library(stringi)
library(stringr)
library(splitstackshape)
library(data.table)
library(ggplot2)
library(scales)
library(devtools)
#library(rstudioapi)
##install the arealOverlap pkg , which I created 
#devtools::install_github("https://github.com/jcuriel-unc/arealOverlap2",subdir="arealOverlap")
library(arealOverlap)
library(wru)
#devtools::install_github("https://github.com/jcuriel-unc/zipWRUext",subdir="zipWRUext2")
library(zipWRUext2)
###set directory 
#data_wd <- dirname(rstudioapi::getActiveDocumentContext()$path)
#setwd(data_wd)

##get start time 
start_time_all <- Sys.time()

#### Load in congressional shapefiles 
#source: https://www.census.gov/geographies/mapping-files/time-series/geo/carto-boundary-file.html
cong_shp <- readOGR("shpfiles/congress", "cb_2018_us_cd116_500k")

###now load the state of interest, in this case NC 
nc_cong <- subset(cong_shp, STATEFP == "37" )

###now let's load in the NC data 
#source: https://dl.ncsbe.gov/?prefix=PrecinctMaps/
nc_prec <- readOGR("shpfiles/nc_precinct" ,"SBE_PRECINCTS_09012012")
head(nc_prec@data)

###we will now want to read in a voterfile from the same date as the prec shpfile 
nc_voter_reg <- readRDS("shpfiles/nc_precinct/import_vr/vr2012snapshot.rds")

###now let's create values for: white, black, other ; dem, gop, other_party
nc_voter_reg$race_code <- trimws(nc_voter_reg$race_code)
nc_voter_reg$white <- 0
nc_voter_reg$white[nc_voter_reg$race_code=="W"] <- 1
nc_voter_reg$black <- 0
nc_voter_reg$black[nc_voter_reg$race_code=="B"] <- 1
nc_voter_reg$other_race <- 0
nc_voter_reg$other_race[nc_voter_reg$race_code!="W" & nc_voter_reg$race_code != "B"] <- 1

##party 
nc_voter_reg$democrat <- 0
nc_voter_reg$democrat[nc_voter_reg$party_desc=="DEMOCRATIC"] <- 1
nc_voter_reg$gop <- 0
nc_voter_reg$gop[nc_voter_reg$party_desc=="REPUBLICAN"] <- 1
nc_voter_reg$other_party <- 0
nc_voter_reg$other_party[nc_voter_reg$party_desc!="REPUBLICAN" & nc_voter_reg$party_desc!="DEMOCRATIC"] <- 1

##check zip code field
str(nc_voter_reg$zip_code) #appears to be weird and messed up; fix 
nc_voter_reg$zip_code2 <- as.numeric(gsub("([0-9]+).*$", "\\1", nc_voter_reg$zip_code))
str(nc_voter_reg$zip_code2) # seems better; now convert back to string 
nc_voter_reg$zip_code2 <- str_pad(nc_voter_reg$zip_code2, pad="0", side="left",width = 5)
#make sure to grab only first 5 values as well 
nc_voter_reg$zip_code2 <- substr(nc_voter_reg$zip_code2, 1, 5)

sort(unique(nc_voter_reg$zip_code2)) # some zip codes in other states; 
# TN, GA, 

ga_sub <- subset(nc_voter_reg, zip_code2=="30546")
tn_sub <- subset(nc_voter_reg, zip_code2=="37691" | zip_code2=="37821")
nc_voter_reg <- subset(nc_voter_reg, zip_code2!="30546" & zip_code2!="37691" & zip_code2!="37821")


### Let's now run BISG
nc_voter_reg <- zip_wru(nc_voter_reg, state="NORTH CAROLINA", type1="census", year1="2010", 
                                zip_col="zip_code2", surname_field = "last_name")
sum(is.na(nc_voter_reg$pred.whi)) # 25847 are missing; let's do county

census.nc_county <- get_census_data("b85306550d1fd788ddc045abfa6acf6ba7110abc",state=c("NC"),age=FALSE,sex=FALSE,
                             census.geo="county")
###now let's subset the nc voter reg file 
###now let's do counties; we will want to merge on the info 
county_fips <- read.csv("data/county-fips-codes.csv")
county_fips <- subset(county_fips, state =="North Carolina")

nc_voter_reg <- merge(nc_voter_reg,county_fips,by.x="county_desc",by.y="county_name",all.x=T )
sum(is.na(nc_voter_reg$county_fips))###good, nothing is missing 
head(nc_voter_reg$county_fips)
nc_voter_reg$county <- substr(nc_voter_reg$county_fips, 3,5)
colnames(nc_voter_reg)[colnames(nc_voter_reg)=="state"] <- "state_name"
nc_voter_reg$state <- "NC"

## Now subset out 
nc_voter_reg_missing <- subset(nc_voter_reg, is.na(pred.whi)==TRUE)
nc_voter_reg <- subset(nc_voter_reg, is.na(pred.whi)==FALSE)

###drop the columns not of interest from the missing data 
nc_voter_reg_missing <- subset(nc_voter_reg_missing, select=-c(surname.match,pred.whi,pred.bla,pred.his,pred.asi,
                                                               pred.oth))
###Need to fix the county field here. 

####let's now run the BISG here for county 
nc_voter_reg_missing <- predict_race(nc_voter_reg_missing, census.geo = "county", census.data = census.nc_county,
                                     age=FALSE, sex=FALSE)
sum(is.na(nc_voter_reg_missing$pred.whi)) #nothing missing; should be able to join back on now 
names(nc_voter_reg_missing) # let's add on the surname.match field 
nc_voter_reg_missing$surname.match <- ""

###now let's bind again ; seems to be a problem here
nc_voter_reg <- rbind(nc_voter_reg, nc_voter_reg_missing)

##assign race categorically 
nc_voter_reg <- race_herfindahl_scores(nc_voter_reg)

### now create new dummy var 
nc_voter_reg$white_plural <- 0
nc_voter_reg$white_plural[nc_voter_reg$plural_race=="white"] <- 1
nc_voter_reg$black_plural <- 0
nc_voter_reg$black_plural[nc_voter_reg$plural_race=="black"] <- 1
nc_voter_reg$other_plural <- 0
nc_voter_reg$other_plural[nc_voter_reg$plural_race!="white" & nc_voter_reg$plural_race!="black"] <- 1

##check to make sure it matches 
sum(nc_voter_reg$white_plural) - sum(nc_voter_reg$pred.whi)
sum(nc_voter_reg$black_plural) - sum(nc_voter_reg$pred.bla)
sum(nc_voter_reg$other_plural) 


##save data 
saveRDS(nc_voter_reg, "data/nc_voter_reg_cleanedwbisg.rds")
nc_voter_reg <- readRDS("data/nc_voter_reg_cleanedwbisg.rds")

####Create the ggplot density plot here

######## Figure S2(a) #############################
nc_voter_reg$eff_num_races <- (1/nc_voter_reg$herf_weight)
summary(nc_voter_reg$eff_num_races)
nc_density_herf <-  ggplot(nc_voter_reg, aes(x=eff_num_races)) +
  geom_density(alpha=0.5, fill="blue") + xlim(1,5) + 
  labs(title="NC BISG Uncertainty",y="Density",x="Effective Number of Races") + theme_minimal()
nc_density_herf
ggsave("plots/nc_herfindahl_density.png" ,plot=nc_density_herf, scale=1,width=9,height=6,units = c("in"),dpi=600 )
######################################################


#plot error rate by herfindahl index:

nc_voter_reg$err_b_white <- abs(nc_voter_reg$pred.whi - nc_voter_reg$white)
nc_voter_reg$err_b_black <- abs(nc_voter_reg$pred.bla - nc_voter_reg$black)

nc_voter_reg$err_p_white <- abs(nc_voter_reg$white_plural - nc_voter_reg$white)
nc_voter_reg$err_p_black <- abs(nc_voter_reg$black_plural - nc_voter_reg$black)


#collapse errors down to zip code level, for plotting

nc_voter_reg_herf <- nc_voter_reg %>% 
  group_by(zcta5) %>% 
  summarise(eff_num_races=mean(eff_num_races), err_b_white=mean(err_b_white), err_b_black=mean(err_b_black), 
            err_p_white=mean(err_p_white), err_p_black=mean(err_p_black))



############# Figure S3 ########################### 
err_b_white_herf <-  ggplot(nc_voter_reg_herf, aes(x=eff_num_races, y=err_b_white)) +
  geom_smooth(method=loess) + expand_limits(y = c(0, 0.5)) + xlim(1,3) + theme_minimal() +labs(title="BISG Prob. Sums (PSM) Average Error - White",y="Average Error",x="Effective number of races")
err_b_white_herf
ggsave("plots/nc_err_b_white_herf.png" ,plot=err_b_white_herf, scale=1,width=9,height=6,units = c("in"),dpi=600 )

err_b_black_herf <-  ggplot(nc_voter_reg_herf, aes(x=eff_num_races, y=err_b_black)) +
  geom_smooth(method=loess) + expand_limits(y = c(0, 0.8)) + xlim(1,3) + theme_minimal() +labs(title="BISG Prob. Sums (PSM) Average Error - Black",y="Average Error",x="Effective number of races")
err_b_black_herf
ggsave("plots/nc_err_b_black_herf.png" ,plot=err_b_black_herf, scale=1,width=9,height=6,units = c("in"),dpi=600 )

err_p_white_herf <-  ggplot(nc_voter_reg_herf, aes(x=eff_num_races, y=err_p_white)) +
  geom_smooth(method=loess) + expand_limits(y = c(0, 0.5)) + xlim(1,3) + theme_minimal() +labs(title="Plurality (PM) Average Error - White",y="Average Error",x="Effective number of races")
err_p_white_herf
ggsave("plots/nc_err_p_white_herf.png" ,plot=err_p_white_herf, scale=1,width=9,height=6,units = c("in"),dpi=600 )

err_p_black_herf <-  ggplot(nc_voter_reg_herf, aes(x=eff_num_races, y=err_p_black)) +
  geom_smooth(method=loess) + expand_limits(y = c(0, 0.8)) + xlim(1,3) + theme_minimal() +labs(title="Plurality (PM) Average Error - Black",y="Average Error",x="Effective number of races")
err_p_black_herf
ggsave("plots/nc_err_p_black_herf.png" ,plot=err_p_black_herf, scale=1,width=9,height=6,units = c("in"),dpi=600 )
###################################################





###now let's sum the results by precinct 
nc_voter_reg_col <- nc_voter_reg %>% 
  group_by(county_desc,county_id,precinct_abbrv,precinct_desc) %>% 
  summarise(white=sum(white), black=sum(black), other_race=sum(other_race), democrat=sum(democrat),
            gop=sum(gop),other_party=sum(other_party),bisg_whi=sum(pred.whi), bisg_bla=sum(pred.bla),
            bisg_oth_all=sum(pred.asi+pred.his+pred.oth), white_plural=sum(white_plural),
            black_plural=sum(black_plural), other_plural=sum(other_plural))
nc_voter_reg_col$total = nc_voter_reg_col$democrat+nc_voter_reg_col$gop+nc_voter_reg_col$other_party

#get rid of extra white space 
nc_voter_reg_col$precinct_desc <- trimws(nc_voter_reg_col$precinct_desc, "both")
nc_voter_reg_col$precinct_abbrv <- trimws(nc_voter_reg_col$precinct_abbrv, "both")

sort(unique(nc_voter_reg_col$precinct_desc))
##check the missing prec data 
nc_voter_reg_col_missing <- subset(nc_voter_reg_col,precinct_abbrv=="" )
sum(nc_voter_reg_col_missing$total) # we will want to note that these people are missing: 6084

##remove missing precs 
nc_voter_reg_col <- subset(nc_voter_reg_col,precinct_abbrv!="" )
nrow(nc_voter_reg_col)
nrow(nc_prec) - nrow(nc_voter_reg_col) # 18 precs missing 


###read in the block data 
nc_blocks <- readOGR("shpfiles/block_data", "tl_2010_37_tabblock10" )

##grab the demos 
census.nc_block <- get_census_data("b85306550d1fd788ddc045abfa6acf6ba7110abc",state=c("NC"),age=FALSE,sex=FALSE,
                                    census.geo="block")
saveRDS(census.nc_block, "data/census_nc_blockstats.rds")
census.nc_block <- readRDS("data/census_nc_blockstats.rds")
class(census.nc_block)
length(census.nc_block)
census.nc_block[[1]]
census.nc_block2 <- census.nc_block$NC$block

###let's rename some of the fields 
colnames(census.nc_block2)[colnames(census.nc_block2)=="P005003"] <- "white"
colnames(census.nc_block2)[colnames(census.nc_block2)=="P005004"] <- "black"
census.nc_block2$other_races <- rowSums(census.nc_block2[,7:12])
summary(census.nc_block2$other_races)
##subset the block demos 
census.nc_block2 <- subset(census.nc_block2, select=c(state,county,tract,block,white,black,other_races))
colnames(census.nc_block2)[2:4] <- c("COUNTYFP10", "TRACTCE10", "BLOCKCE10")
#merge on data 
nc_blocks <- merge(nc_blocks,census.nc_block2, by=c("COUNTYFP10", "TRACTCE10", "BLOCKCE10") )

###let's get point data 
nc_geocoded_blocks <- SpatialPointsDataFrame(coords=coordinates(nc_blocks),data=nc_blocks@data,
                                      proj4string = 
                                        CRS("+proj=longlat +datum=WGS84 +ellps=WGS84 +towgs84=0,0,0"))
nc_prec <- spTransform(nc_prec, CRS=CRS("+proj=longlat +datum=WGS84 +ellps=WGS84 +towgs84=0,0,0"))
###can now overlay

nc_geocoded_blocks$PREC_ID <- over(nc_geocoded_blocks,nc_prec)$PREC_ID
nc_geocoded_blocks$ENR_DESC <- over(nc_geocoded_blocks,nc_prec)$ENR_DESC
nc_geocoded_blocks$COUNTY_NAM <- over(nc_geocoded_blocks,nc_prec)$COUNTY_NAM

###now collapse the data 
nc_prec_demos <- nc_geocoded_blocks@data %>% group_by(COUNTY_NAM,PREC_ID,ENR_DESC) %>%
  summarise(white=sum(white),black=sum(black),other_races=sum(other_races))

###merge 
nc_prec <- merge(nc_prec,nc_prec_demos, by=c("COUNTY_NAM","PREC_ID","ENR_DESC") )

saveRDS(nc_prec, "data/nc_precincts_demos.rds")
nc_prec <- readRDS("data/nc_precincts_demos.rds")
class(nc_prec)
nc_voter_reg_col <- as.data.frame(nc_voter_reg_col)
head(nc_voter_reg_col)
##fix names 
colnames(nc_prec@data)[colnames(nc_prec@data)=="white"] <- "white_cb"
colnames(nc_prec@data)[colnames(nc_prec@data)=="black"] <- "black_cb"
colnames(nc_prec@data)[colnames(nc_prec@data)=="other_races"] <- "other_races_cb"
nc_prec$COUNTY_NAM[nc_prec$COUNTY_NAM=="NEW_HANOVER"] <- "NEW HANOVER"

###add some fixes for the precincts 
nc_voter_reg_col$precinct_abbrv[nc_voter_reg_col$precinct_abbrv=="55-11" & nc_voter_reg_col$county_desc=="DURHAM"] <-
  "55"
nc_voter_reg_col$precinct_abbrv[nc_voter_reg_col$precinct_abbrv=="55-49" & nc_voter_reg_col$county_desc=="DURHAM"] <-
  "55"
nc_voter_reg_col$precinct_abbrv[nc_voter_reg_col$precinct_abbrv=="SH#1" & nc_voter_reg_col$county_desc=="GREENE"] <-
  "SH1"
nc_voter_reg_col$precinct_abbrv[nc_voter_reg_col$precinct_abbrv=="H27-A" & nc_voter_reg_col$county_desc=="GUILFORD"] <-
  "H27"
nc_voter_reg_col$precinct_abbrv[nc_voter_reg_col$precinct_abbrv=="H27-B" & nc_voter_reg_col$county_desc=="GUILFORD"] <-
  "H27"
nc_voter_reg_col$precinct_abbrv[nc_voter_reg_col$precinct_abbrv=="G42A" & nc_voter_reg_col$county_desc=="GUILFORD"] <-
  "G42"
nc_voter_reg_col$precinct_abbrv[nc_voter_reg_col$precinct_abbrv=="G42B" & nc_voter_reg_col$county_desc=="GUILFORD"] <-
  "G42"
nc_voter_reg_col$precinct_abbrv[nc_voter_reg_col$precinct_abbrv=="NE22A" & nc_voter_reg_col$county_desc=="ONSLOW"] <-
  "NE22"
nc_voter_reg_col$precinct_abbrv[nc_voter_reg_col$precinct_abbrv=="NE22B" & nc_voter_reg_col$county_desc=="ONSLOW"] <-
  "NE22"
nc_voter_reg_col$precinct_abbrv[nc_voter_reg_col$precinct_abbrv=="1AL A" & nc_voter_reg_col$county_desc=="PAMLICO"] <-
  "1ALA"
nc_voter_reg_col$precinct_abbrv[nc_voter_reg_col$precinct_abbrv=="4VM A" & nc_voter_reg_col$county_desc=="PAMLICO"] <-
  "4VMA"
nc_voter_reg_col$precinct_abbrv[nc_voter_reg_col$precinct_abbrv=="4MSIC" & nc_voter_reg_col$county_desc=="PAMLICO"] <-
  "4VMSIC"
nc_voter_reg_col$precinct_abbrv[nc_voter_reg_col$precinct_abbrv=="ALMOND" & nc_voter_reg_col$county_desc=="SWAIN"] <-
  "Almond"
nc_voter_reg_col$precinct_abbrv[nc_voter_reg_col$precinct_abbrv=="ALARKA" & nc_voter_reg_col$county_desc=="SWAIN"] <-
  "Alarka"
nc_voter_reg_col$precinct_abbrv[nc_voter_reg_col$county_desc=="YANCEY"] <-
  gsub(" ", "-", nc_voter_reg_col$precinct_abbrv)[nc_voter_reg_col$county_desc=="YANCEY"]
nc_voter_reg_col <- subset(nc_voter_reg_col, select=-c(county_id,precinct_desc))

###summarise
nc_voter_reg_col2 <- aggregate(.~county_desc+precinct_abbrv, nc_voter_reg_col, sum)
head(nc_voter_reg_col2)
class(nc_voter_reg_col2) #goood, seems like its taken care of 
###let's see if I can merge on the other data now 
nc_prec2 <- merge(nc_prec, nc_voter_reg_col2, by.x=c("COUNTY_NAM","PREC_ID"),
                  by.y=c("county_desc","precinct_abbrv") )
##let's check the missing data 
nc_prec2missing <- subset(nc_prec2, is.na(bisg_whi)==TRUE)
sort(unique(nc_prec2missing$PREC_ID))
table(nc_prec2missing$COUNTY_NAM)

##saving 
nc_prec2$cb_total <- nc_prec2$white_cb+nc_prec2$black_cb+nc_prec2$other_races_cb
saveRDS(nc_prec2, "data/nc_prec2merged.rds")
nc_prec2 <- readRDS("data/nc_prec2merged.rds")

#####NOTE: at this point, I did some editing in ARCMAPs, changing membership of some precincts so as to fix the 
#contiguity issues. Now I'll want to re-read in the data. 
nc_prec2edits <- readOGR("shpfiles", "nc_precinct_cd_edits4")
nc_new_initcds <- read.dbf("shpfiles/nc_precinct_cd_edits4.dbf", as.is = T)
head(nc_new_initcds)
nc_new_initcds <- subset(nc_new_initcds, select=c(initcd))
##apply new vec 
nc_prec2@data <- cbind(nc_prec2@data,nc_new_initcds)
head(nc_prec2@data )
str(nc_prec2@data)
nc_prec2edits <- nc_prec2
str(nc_prec2edits@data) #everything appears good 
#colnames(nc_prec2edits@data) <- colnames(nc_prec2@data)


tempnc <- nc_prec2edits@data
write.csv(tempnc, "data/nc_cvap_table.csv", row.names = FALSE)

#####now let's set up the simulations ##########################
adj.mat<- gTouches(nc_prec2edits, byid = TRUE)#this worked and will act as the adjacency matrix. 
adj.mat2<-as.matrix(adj.mat)

adj.mat2[1:3,1:3] ##see, it is true/false
#Now create a  0 or 1 matrix from a true or false matrix.

adj.mat3<-adj.mat2*1 # A very easy command. 
diag(adj.mat3) <- 1 #should of course be contiguous with self

### Step 4: Run checks on the matrix 
#how redist checks a matrix ; all should be TRUE 
## Is it square?
squaremat <- (nrow(adj.mat3) == ncol(adj.mat3))
squaremat
## All binary entries?
binary <- ((length(unique(c(adj.mat3))) == 2) &
             (sum(unique(c(adj.mat3)) %in% c(0, 1)) == 2))
binary
## Diagonal elements all 1?
diag <- (sum(diag(adj.mat3)) == nrow(adj.mat3))
diag
## Symmetric?
symmetric <- isSymmetric(adj.mat3)
symmetric

# Step 5: Convert the adjacency matrix to list form
##Note: This will avoid an error present within the redst pkg

#note: so the default adjacency object is a list. 
#Therefore, the script below is from the redist.mcmc code that converts the mat to 
# a list. I should be able to convert it to a list myself 
adjlist <- vector("list", nrow(adj.mat3))
## Loop through rows in matrix
for(i in 1:nrow(adj.mat3)){
  
  ## Extract row
  adjvec <- adj.mat3[,i]
  ## Find elements it is adjacent to
  inds <- which(adjvec == 1)
  ## Remove self-adjacency
  inds <- inds[inds != i]
  ## Zero-index
  inds <- inds - 1
  ## Put in adjlist
  adjlist[[i]] <- inds}
###a check on the list 
minlist <- min(unlist(adjlist))
maxlist <- max(unlist(adjlist))
oneind <- (sum(minlist == 1, maxlist == length(adjlist)) == 2)
zeroind <- (sum(minlist == 0, maxlist == (length(adjlist) - 1)) == 2)

if(oneind){
  ## Zero-index list
  for(i in 1:length(adjlist)){
    adjlist[[i]] <- adjlist[[i]] - 1
  }
}else if(!(oneind | zeroind)){
  ## if neither oneind or zeroind, then stop
  stop("Adjacency list must be one-indexed or zero-indexed")
}
##If this does not stop, all is good 

#list seems fine 
nc_prec2edits$dist_num<-as.numeric(as.character(nc_prec2edits$initcd))

## Step 6: Run the simulations 
##running simulation
options(cores=2) #set multi cores 

start_time <- Sys.time()
alg_census2 <-redist.mcmc(adjobj = adjlist, popvec = nc_prec2edits$cb_total, initcds = nc_prec2edits$dist_num,
                        nsims=10000, nloop = 4, rngseed = 1337,
                        grouppopvec = nc_prec2edits$black_cb,savename = "ga_output/alg_census_store",
                        popcons = .05, contiguitymap = "rooks", verbose = TRUE)
end_time <- Sys.time()
##total time 
end_time - start_time ### 44.64 minutes

###saved with the attachment "loop X" at end; saved as RData ; read those in via a loop 
partition_df <- data.frame()
for (i in 1:4) {
  file_name1 <- paste0("nc_output/alg_census_store",sep="_","loop",sep="",i,sep=".","RData")
  load(file_name1) ###will be saved and named as "algout" ; cna then grab partitions 
  temp_algloop <- algout$partitions
  temp_algloop <- as.data.frame(temp_algloop)
  if(nrow(partition_df)==0){
    partition_df <- temp_algloop
  }else{
    partition_df <- cbind(partition_df, temp_algloop)
  }
  
}
rm(alg_census,algout,alg_census2)

###now everything is saved into the partition_df

###slimming down 
head(nc_prec2edits@data)
nc_prec2edits_wpart <- nc_prec2edits@data

##good, this worked.Now what we'll need to do is run a loop where I can aggregate all of the data by each column. 
#From there, I can look sort all of the rows and such. Then I'll be able to plot everything. 
#Looks like the name of the first col is "1" . The last is of course a col named "10000". IT is the col 10002. 
race_prec_df <- matrix(NA, ncol=13, nrow = 40000)
race_bisg_df <- matrix(NA, ncol=13, nrow = 40000)
race_plural_bisg_df <- matrix(NA, ncol=13, nrow = 40000)

white_prec_df <- matrix(NA, ncol=13, nrow = 40000)
white_bisg_df <- matrix(NA, ncol=13, nrow = 40000)
white_plural_bisg_df <- matrix(NA, ncol=13, nrow = 40000)

temp_nc_masterdf <- cbind(nc_prec2edits_wpart, partition_df)
for(i in 1:40000){
  ###aggregating the vote, given by the values of the ith + 2 column 
  temp_agg <- aggregate(list(temp_nc_masterdf$total,temp_nc_masterdf$cb_total,temp_nc_masterdf$white_cb,
                             temp_nc_masterdf$black_cb, 
                             temp_nc_masterdf$white, temp_nc_masterdf$black,
                             temp_nc_masterdf$bisg_whi, temp_nc_masterdf$bisg_bla,temp_nc_masterdf$white_plural,
                             temp_nc_masterdf$black_plural,
                             temp_nc_masterdf$democrat, temp_nc_masterdf$gop), 
                        by= list(temp_nc_masterdf[,i+22]), sum)
    ###with the new temp_agg object, rename the fields 
  colnames(temp_agg) <- c("district", "total", "cb_total", "white_cb", "black_cb", 
                          "white_prec", "black_prec", "bisg_whi",
                          "bisg_bla", "white_plural", "black_plural", "dem_vote", "gop_vote")
  temp_agg$black_prec_pct <- temp_agg$black_prec/(temp_agg$total)
  temp_agg$black_bisg_pct <- temp_agg$bisg_bla/(temp_agg$total)
  temp_agg$black_plural_pct <- temp_agg$black_plural/(temp_agg$total)
  
  ###white
  temp_agg$white_prec_pct <- temp_agg$white_prec/(temp_agg$total)
  temp_agg$white_bisg_pct <- temp_agg$bisg_whi/(temp_agg$total)  
  temp_agg$white_plural_pct <- temp_agg$white_plural/(temp_agg$total)  
  
  ###get the dem pct values , sorted least to most 
  temp_store_race2 <- sort(temp_agg$black_prec_pct)
  temp_store_race3 <- sort(temp_agg$black_bisg_pct)
  temp_store_race4 <- sort(temp_agg$black_plural_pct)
  
  ##temp store white 
  temp_store_race2w <- sort(temp_agg$white_prec_pct)
  temp_store_race3w <- sort(temp_agg$white_bisg_pct)
  temp_store_race4w <- sort(temp_agg$white_plural_pct)
  
      #store the values into a df 
  race_prec_df[i,] <- temp_store_race2
  race_bisg_df[i, ] <- temp_store_race3
  race_plural_bisg_df[i, ] <- temp_store_race4
  ###white results 
  white_prec_df[i,] <- temp_store_race2w
  white_bisg_df[i, ] <- temp_store_race3w
  white_plural_bisg_df[i, ] <- temp_store_race4w
 
}


race_prec_df <- as.data.frame(race_prec_df)
race_bisg_df <- as.data.frame(race_bisg_df)
race_plural_bisg_df <- as.data.frame(race_plural_bisg_df)
white_prec_df <- as.data.frame(white_prec_df)
white_bisg_df <- as.data.frame(white_bisg_df)
white_plural_bisg_df <- as.data.frame(white_plural_bisg_df)


saveRDS(race_prec_df, "nc_output/mpsa_least2most_blackprec_dist.rds")
saveRDS(race_bisg_df, "nc_output/mpsa_least2most_blackbisg_dist.rds")
saveRDS(race_plural_bisg_df, "nc_output/mpsa_least2most_blackbisg_plural_dist.rds")
saveRDS(white_prec_df, "nc_output/mpsa_least2most_whiteprec_dist.rds")
saveRDS(white_bisg_df, "nc_output/mpsa_least2most_whitebisg_dist.rds")
saveRDS(white_plural_bisg_df, "nc_output/mpsa_least2most_whitebisg_plural_dist.rds")




#### read in the data 
race_prec_df <- readRDS("nc_output/mpsa_least2most_blackprec_dist.rds")
race_bisg_df <- readRDS("nc_output/mpsa_least2most_blackbisg_dist.rds")
race_plural_bisg_df <- readRDS("nc_output/mpsa_least2most_blackbisg_plural_dist.rds")
white_prec_df <- readRDS("nc_output/mpsa_least2most_whiteprec_dist.rds")
white_bisg_df <- readRDS("nc_output/mpsa_least2most_whitebisg_dist.rds")
white_plural_bisg_df <- readRDS("nc_output/mpsa_least2most_whitebisg_plural_dist.rds")

####Now let's plot the data ; this collapses it, and finds the 95% CI, and median estimate
apply(white_prec_df, 2,quantile, probs=c(0.025,.5,.975) )
apply(white_bisg_df, 2,quantile, probs=c(0.025,.5,.975) )
apply(white_plural_bisg_df, 2,quantile, probs=c(0.025,.5,.975) )

###let's get abs diff of matrices 
white_prec_bisg_diff_df <- apply( abs(white_prec_df-white_bisg_df), 2,quantile, probs=c(0.025,.5,.975) )*100
white_prec_plural_diff_df <-apply( abs(white_prec_df-white_plural_bisg_df), 2,quantile, probs=c(0.025,.5,.975) )*100
black_prec_bisg_diff_df <-apply( abs(race_prec_df-race_bisg_df), 2,quantile, probs=c(0.025,.5,.975) )*100
black_prec_plural_diff_df <-apply( abs(race_prec_df-race_plural_bisg_df), 2,quantile, probs=c(0.025,.5,.975) )*100


###Let's now create the plot 

#lowest to highest, as per ordering (V1, V2, etc...)
pctblackCD <- data.frame(V1=0.031, V2=0.116, V3=0.122, V4=0.126, V5=0.147, V6=0.165, V7=0.17, V8=0.175, V9=0.185, V10=0.189, V11=0.324, V12=0.508, V13=0.536)
pctblackCD_IDs <- melt(as.matrix(pctblackCD))
#reshape original data and error bars
black_prec_bisg_diff_df <- rbind(black_prec_bisg_diff_df,pctblackCD)
black_prec_bisg_diff_df_rshp <- melt(as.matrix(black_prec_bisg_diff_df))
#merge to pct black ids
black_prec_bisg_diff_df_rshp <- merge(black_prec_bisg_diff_df_rshp,pctblackCD_IDs, by=c("Var2") )
#reshape again
black_prec_bisg_diff_df_rshp <- reshape(black_prec_bisg_diff_df_rshp, idvar="Var2", v.names = "value.x", timevar = "Var1.x", direction="wide")
#sort
black_prec_bisg_diff_df_rshp <- black_prec_bisg_diff_df_rshp[order(black_prec_bisg_diff_df_rshp$value.y),]


#now plurality difference
#reshape original data and error bars
black_prec_plural_diff_df <- rbind(black_prec_plural_diff_df,pctblackCD)
black_prec_plural_diff_df_rshp <- melt(as.matrix(black_prec_plural_diff_df))
#merge to pct black ids
black_prec_plural_diff_df_rshp <- merge(black_prec_plural_diff_df_rshp,pctblackCD_IDs, by=c("Var2") )
#reshape again
black_prec_plural_diff_df_rshp <- reshape(black_prec_plural_diff_df_rshp, idvar="Var2", v.names = "value.x", timevar = "Var1.x", direction="wide")
#sort
black_prec_plural_diff_df_rshp <- black_prec_plural_diff_df_rshp[order(black_prec_plural_diff_df_rshp$value.y),]


######################### Figure 2 c ########################
#new plot
jpeg("plots/black_bisg_error_sensitivity_nc.jpg", res=600, height = 6, width = 9, units = "in")
###bisg diff
plot(black_prec_bisg_diff_df_rshp$value.y, black_prec_bisg_diff_df_rshp$`value.x.50%`,col="blue", pch=19,
     ylab="% Point Diff in Black Composition", xlab="Congressional District Share Black", ylim=c(0,20), xlim=c(0,0.6), xaxt="n")
axis(1, at = c(0,0.1,0.2,0.3,0.4,0.5,0.6))
arrows(black_prec_bisg_diff_df_rshp$value.y, black_prec_bisg_diff_df_rshp$`value.x.2.5%`, black_prec_bisg_diff_df_rshp$value.y, black_prec_bisg_diff_df_rshp$`value.x.97.5%`,
       length=0.05, angle=90, code=3, col="blue")
#plurality differences
points(black_prec_plural_diff_df_rshp$value.y, black_prec_plural_diff_df_rshp$`value.x.50%`,col="red", pch=19)
arrows(black_prec_plural_diff_df_rshp$value.y, black_prec_plural_diff_df_rshp$`value.x.2.5%`, black_prec_plural_diff_df_rshp$value.y, black_prec_plural_diff_df_rshp$`value.x.97.5%`,
       length=0.05, angle=90, code=3, col="red")

legend("topleft", bty="n",c("Real - BISG Prob. Sums","Real - BISG Plurality"), 
       fill = c("blue","red"), col=c("blue","red"))
dev.off()


#lowest to highest WHITE pct now, as per ordering (V1, V2, etc...)
pctwhiteCD <- data.frame(V1=0.336, V2=0.375, V3=0.53, V4=0.661, V5=0.713, V6=0.722, V7=0.741, V8=0.744, V9=0.78, V10=0.784, V11=0.793, V12=0.822, V13=0.899)
pctwhiteCD_IDs <- melt(as.matrix(pctwhiteCD))
#reshape original data and error bars
white_prec_bisg_diff_df <- rbind(white_prec_bisg_diff_df,pctwhiteCD)
white_prec_bisg_diff_df_rshp <- melt(as.matrix(white_prec_bisg_diff_df))
#merge to pct black ids
white_prec_bisg_diff_df_rshp <- merge(white_prec_bisg_diff_df_rshp,pctwhiteCD_IDs, by=c("Var2") )
#reshape again
white_prec_bisg_diff_df_rshp <- reshape(white_prec_bisg_diff_df_rshp, idvar="Var2", v.names = "value.x", timevar = "Var1.x", direction="wide")
#sort
white_prec_bisg_diff_df_rshp <- white_prec_bisg_diff_df_rshp[order(white_prec_bisg_diff_df_rshp$value.y),]


#now plurality difference
#reshape original data and error bars
white_prec_plural_diff_df <- rbind(white_prec_plural_diff_df,pctwhiteCD)
white_prec_plural_diff_df_rshp <- melt(as.matrix(white_prec_plural_diff_df))
#merge to pct black ids
white_prec_plural_diff_df_rshp <- merge(white_prec_plural_diff_df_rshp,pctwhiteCD_IDs, by=c("Var2") )
#reshape again
white_prec_plural_diff_df_rshp <- reshape(white_prec_plural_diff_df_rshp, idvar="Var2", v.names = "value.x", timevar = "Var1.x", direction="wide")
#sort
white_prec_plural_diff_df_rshp <- white_prec_plural_diff_df_rshp[order(white_prec_plural_diff_df_rshp$value.y),]


################# Figure 2a ##############################
jpeg("plots/white_bisg_error_sensitivity_nc.jpg", res=600, height = 6, width = 9, units = "in")
###bisg diff
plot(white_prec_bisg_diff_df_rshp$value.y,white_prec_bisg_diff_df_rshp$`value.x.50%`,col="blue", pch=19,
     ylab="% Point Diff in White Composition", xlab="Congressional District Share White", ylim=c(0,25), xlim=c(0.3,0.9), xaxt="n")
axis(1, at = c(0.3,0.4,0.5,0.6,0.7,0.8,0.9))
arrows(white_prec_bisg_diff_df_rshp$value.y, white_prec_bisg_diff_df_rshp$`value.x.2.5%`, white_prec_bisg_diff_df_rshp$value.y, white_prec_bisg_diff_df_rshp$`value.x.97.5%`,
       length=0.05, angle=90, code=3, col="blue")
#plurality differences
points(white_prec_plural_diff_df_rshp$value.y, white_prec_plural_diff_df_rshp$`value.x.50%`,col="red", pch=19)
arrows(white_prec_plural_diff_df_rshp$value.y, white_prec_plural_diff_df_rshp$`value.x.2.5%`, white_prec_plural_diff_df_rshp$value.y, white_prec_plural_diff_df_rshp$`value.x.97.5%`,
       length=0.05, angle=90, code=3, col="red")

legend("topleft", bty="n",c("Real - BISG Prob. Sums","Real - BISG Plurality"), 
       fill = c("blue","red"), col=c("blue","red"))
dev.off()
###########################################################




####Let's get the nc_prec data cleaned 
nc_prec2edits$prec_white_bisg_diff <- 
  abs( (nc_prec2edits$white/nc_prec2edits$total) - (nc_prec2edits$bisg_whi/nc_prec2edits$total))
nc_prec2edits$prec_black_bisg_diff <- 
  abs( (nc_prec2edits$black/nc_prec2edits$total) - (nc_prec2edits$bisg_bla/nc_prec2edits$total))
nc_prec2edits$prec_white_bisgp_diff <- 
  abs( (nc_prec2edits$white/nc_prec2edits$total) - (nc_prec2edits$white_plural/nc_prec2edits$total))
nc_prec2edits$prec_black_bisgp_diff <- 
  abs( (nc_prec2edits$white/nc_prec2edits$total) - (nc_prec2edits$black_plural/nc_prec2edits$total))
###now diff between cb and prec results 
nc_prec2edits$cb_white_prec_diff <- 
  abs( (nc_prec2edits$white_cb/nc_prec2edits$cb_total) - (nc_prec2edits$white/nc_prec2edits$total))
nc_prec2edits$cb_black_prec_diff <- 
  abs( (nc_prec2edits$black_cb/nc_prec2edits$cb_total) - (nc_prec2edits$black/nc_prec2edits$total))
###good. Now let's get quantile diff 

##census to prec 
quantile(nc_prec2edits$cb_white_prec_diff, seq(0,1,by=0.05))
quantile(nc_prec2edits$cb_black_prec_diff, seq(0,1,by=0.05))

###prec to bisg 
quantile(nc_prec2edits$prec_white_bisg_diff, seq(0,1,by=0.05))
quantile(nc_prec2edits$prec_black_bisg_diff, seq(0,1,by=0.05))

####Now let's do the density plot, as that seems better display 
nc_prec2edits_df <- nc_prec2edits@data
nc_prec2edits_df_long <- gather(nc_prec2edits_df, key="method", value="pct_diff", 
                                prec_white_bisg_diff:prec_black_bisgp_diff)

################### Figure 1a #############################
white_density <- ggplot(data=subset(nc_prec2edits_df_long, method=="prec_white_bisg_diff" | 
                                      method=="prec_white_bisgp_diff"),aes(x=pct_diff*100,fill=method)) +
  geom_density(alpha=0.3) + theme_minimal() +
  theme(legend.text=element_text(size=8),legend.title = element_text(size=10))+
  scale_fill_discrete(name="Method", labels=c("Prob. Sums (PSM)","Plurality (PM)")) +
  labs(title="White Margin of Error",y="Density",x="% Difference from Reported Race", fill="Method")
white_density
###########################################################

################ Figure 1c #################################
black_density <- ggplot(data=subset(nc_prec2edits_df_long, method=="prec_black_bisg_diff" | 
                                      method=="prec_black_bisgp_diff"),aes(x=pct_diff*100,fill=method)) +
  geom_density(alpha=0.3) + theme_minimal() +
  theme(legend.text=element_text(size=8),legend.title = element_text(size=10))+
  scale_fill_discrete(name="Method", labels=c("Prob. Sums (PSM)","Plurality (PM)")) +
  labs(title="Black Margin of Error",y="Density",x="% Difference from Reported Race", fill="Method")
black_density
###########################################################

####export plots
ggsave("plots/black_density_error_plot_nc.png" ,plot=black_density, scale=1,width=9,height=6,units = c("in"),dpi=600 )
ggsave("plots/white_density_error_plot_nc.png" ,plot=white_density, scale=1,width=9,height=6,units = c("in"),dpi=600 )

##ending time 
end_time_all <- Sys.time()

##total time 
end_time_all-start_time_all 

